## Loading required package: grid
En este Markdown se intenta representar el número de botnets activas. El dataset utilizado se actualiza cada 5 minutos.
La fuente de los datos utilizados es accesible desde el siguiente enlace https://feodotracker.abuse.ch/.
Mapa con la ubicación de las botnets.
library(maps)
library(dplyr)
library(leaflet)
library(Group4)
library(ggplot2)
library(RcmdrMisc)
## Loading required package: car
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## Loading required package: sandwich
if(!file.exists("df.feo")) df.feo<-maxmindg4.df()
## [1] "[*] Initial setup"
## [1] "[*] Read data from source"
## chr "data.frame"
## Type of dataset :
## Dimension(row x column) : 348 9
## Current lenght: 9 & Object size: 57024
## List of 9
## $ DetectedDate : chr [1:2] "POSIXct" "POSIXt"
## $ DstIP : chr "character"
## $ DstPort : chr "integer"
## $ LastOnlineDate : chr [1:2] "POSIXct" "POSIXt"
## $ Malware : chr "character"
## $ DetectedWeekday: chr "factor"
## $ continent_name : chr "character"
## $ country_code : chr "character"
## $ country_name : chr "character"
## Types of dataset fields: Printing
##
## print done
## Now let's see the values of all non-repeated fields
## 'data.frame': 348 obs. of 9 variables:
## $ DetectedDate : POSIXct, format: "2019-05-30 08:28:38" "2019-05-30 08:16:35" ...
## $ DstIP : chr "185.244.149.206" "94.23.174.183" "185.61.149.38" "176.223.133.178" ...
## $ DstPort : int 447 447 447 447 443 447 447 447 443 447 ...
## $ LastOnlineDate : POSIXct, format: "2019-06-01" "2019-06-02" ...
## $ Malware : chr "TrickBot" "TrickBot" "TrickBot" "TrickBot" ...
## $ DetectedWeekday: Factor w/ 7 levels "Friday","Monday",..: 5 5 5 5 5 5 7 7 7 7 ...
## $ continent_name : chr NA "Europe" "Europe" "Europe" ...
## $ country_code : chr NA "CZ" "LV" "RO" ...
## $ country_name : chr NA "Czech Republic" "Latvia" "Romania" ...
## Structure of the dataset fields:
## [1] "[*] Read RAW data from MaxMind"
## [1] "[*] Subseting scans data set"
## [1] "[*] Expanding MaxMind network ranges"
## [1] "[*] Foreach IP (source and destination) identify network range using parallel computing"
## [1] "[*] Joining source IP's with geolocation data"
## [1] "[*] Tidy data and save it"
# Basic choropleth with leaflet?
leaflet(df.feo) %>%
addTiles() %>%
setView( lat=10, lng=0 , zoom=2) %>% addCircles(lng = ~longitude, lat = ~latitude)
#basic other map
worldleafmap<-map("world",fill=TRUE,plot=FALSE,wrap=c(-180,180,NA))
worldmap<-leaflet(worldleafmap) %>% addTiles() %>%
addPolygons(fillColor = topo.colors(10,alpha=NULL),stroke=FALSE)
worldmap %>% addCircles(lng=df.feo$longitude,lat=df.feo$latitude,radius = 50, color = '#ff0000')
#distribution of bot type
barplot(height = table(df.feo$Malware),col = "red",main = "Botnet type distribution")
#detected and last online dates graph
#hist(x = df.feo$DetectedDate, breaks = 10, col = "green",main = "detected date distribution")
qplot(x = df.feo$DetectedDate,geom = "density",colour = I("green"))
#hist(x = df.feo$LastOnlineDate, breaks = 10, col = "blue",main = "Last online by date")
qplot(x = df.feo$LastOnlineDate,geom = "density",colour = I("blue"))
#detected weekday
barplot(height = table(df.feo$DetectedWeekday),col = "orange",main = "Botnet type distribution")
#exaustive weekday and country name data
g <- ggplot(df.feo, aes(df.feo$DetectedWeekday, df.feo$continent_name, color = df.feo$Malware))
g + geom_count() + labs(title="complete continent & weekday data") + xlab("weekday") +
ylab("Continent Name")
#attempting a K-means trying to find clusters showing relations of factors
#only numeric/factor fields
df.feo3<-df.feo[,c("DstIP","latitude","longitude")]
df.feo3<-RcmdrMisc::KMeans(df.feo3,centers=4,iter.max = 40,num.seeds = 5)
df.feo2<-cbind(df.feo,df.feo3$cluster)
df.feo3
## K-means clustering with 4 clusters of sizes 101, 39, 34, 174
##
## Cluster means:
## DstIP latitude longitude
## 1 1435526338 38.93465 -1.915581
## 2 530735882 37.08945 -2.736110
## 3 2472830728 31.39852 12.865953
## 4 3202307007 12.83373 -38.162090
##
## Clustering vector:
## [1] 4 1 4 4 1 4 4 4 1 4 1 4 1 4 4 4 2 4 1 1 4 2 4 4 4 4 3 3 1 4 4 1 4 4 3
## [36] 2 1 4 1 1 1 1 1 1 1 1 2 2 4 3 1 2 4 3 1 1 1 3 4 2 2 3 2 1 2 4 1 4 1 1
## [71] 1 1 4 4 4 4 1 1 2 4 3 2 4 1 1 4 1 4 1 1 4 4 4 1 4 1 4 1 2 4 4 2 4 2 1
## [106] 1 3 2 1 4 4 4 1 3 4 4 4 4 4 4 4 1 4 1 4 4 4 4 4 4 4 4 4 4 1 1 2 4 4 4
## [141] 4 1 4 1 4 1 4 4 1 4 4 1 4 4 3 4 1 4 4 4 4 1 4 3 1 2 4 4 3 1 3 4 4 4 4
## [176] 4 4 1 4 3 1 4 4 4 1 1 4 4 4 3 4 1 1 4 4 4 2 4 3 3 4 1 4 2 4 3 4 1 1 4
## [211] 4 2 4 4 1 4 4 2 4 4 1 3 4 1 1 1 4 4 4 1 1 4 1 1 4 4 3 3 4 2 1 4 4 3 2
## [246] 4 1 4 1 4 4 4 4 1 4 4 2 3 2 4 2 4 4 4 4 1 4 3 1 4 1 2 4 1 1 4 4 3 4 2
## [281] 4 4 1 4 4 4 1 1 3 1 1 1 3 4 3 3 4 1 4 4 4 2 4 4 4 2 2 4 1 4 4 1 1 4 4
## [316] 1 4 4 1 1 1 2 2 2 2 2 4 4 3 4 4 2 4 2 3 1 4 4 4 3 4 4 1 1 1 3 4 1
##
## Within cluster sum of squares by cluster:
## [1] 6.035829e+18 3.135394e+18 2.245560e+18 6.583097e+18
## (between_SS / total_SS = 95.0 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss"
## [5] "tot.withinss" "betweenss" "size" "iter"
## [9] "ifault"
df.feocluster1<-df.feo[df.feo2$cluster==1,]
df.feocluster2<-df.feo[df.feo2$cluster==2,]
df.feocluster3<-df.feo[df.feo2$cluster==3,]
df.feocluster4<-df.feo[df.feo2$cluster==4,]
#Results yet to be analyzed
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.